import os
import re
import pandas as pd
import xport
import openpyxl
from openpyxl import Workbook
import tkinter as tk
from tkinter import filedialog

def is_nonascii(text):
    return not all(ord(c) < 128 for c in text)

def find_nonascii_in_xpt(file_path):
    nonascii_records = []

    with open(file_path, 'rb') as f:
        xpt_data = xport.to_dataframe(f)

        for column in xpt_data.columns:
            for index, value in xpt_data[column].items():
                if isinstance(value, str) and is_nonascii(value):
                    nonascii_records.append((os.path.basename(file_path), column, index + 1, value))

    return nonascii_records

def main():
    root = tk.Tk()
    root.withdraw()

    xpt_folder = filedialog.askdirectory(title='XPTフォルダを選択してください')
    xpt_files = [os.path.join(xpt_folder, f) for f in os.listdir(xpt_folder) if f.endswith('.xpt')]

    all_nonascii_records = []
    for xpt_file in xpt_files:
        all_nonascii_records.extend(find_nonascii_in_xpt(xpt_file))

    df = pd.DataFrame(all_nonascii_records, columns=['XPTファイル名', '変数名', '行番号', '変数の値'])

    excel_output = filedialog.asksaveasfilename(title='出力Excelファイルを選択してください', initialfile='check-non-ascii.xlsx', filetypes=[("Excel files", "*.xlsx")], defaultextension=".xlsx") 
    
    # Excelファイルにデータフレームを書き込み
    with pd.ExcelWriter(excel_output, engine='openpyxl') as writer:
        df.to_excel(writer, index=False)
        
        # 書式設定を適用
        wb = writer.book
        ws = wb.active
        ws.column_dimensions['A'].width = 15
        ws.column_dimensions['B'].width = 10
        ws.column_dimensions['C'].width = 10
        ws.column_dimensions['D'].width = 120
        
        # 4列目のセルに折り返しを適用
        for row in ws['D']:
            row.alignment = openpyxl.styles.Alignment(wrap_text=True)

        # 変更を保存
        wb.save(excel_output)

if __name__ == '__main__':
    main()